This report presents the results of spectral clustering analysis on Pokemon data, combining statistics, text embeddings, and image features.
library(tidyverse)
library(plotly)
# Load clustering results
clustering_results <- read_csv("q1_clustering/output/spectral_clustering_results.csv")
# Load spectral embedding (20D space where clustering was performed)
spectral_embedding <- readRDS("q1_clustering/output/spectral_embedding.rds")
cat(sprintf("Loaded %d Pokemon with cluster assignments\n", nrow(clustering_results)))
## Loaded 948 Pokemon with cluster assignments
cat(sprintf("Loaded spectral embedding: %d Pokemon x %d dimensions\n",
nrow(spectral_embedding), ncol(spectral_embedding)))
## Loaded spectral embedding: 948 Pokemon x 20 dimensions
# Use first three spectral eigenvectors for 3D visualization
# These are the most informative dimensions from the graph Laplacian
spectral_data_3d <- as_tibble(spectral_embedding[, 1:3]) %>%
rename(Dim1 = 1, Dim2 = 2, Dim3 = 3) %>%
mutate(name = rownames(spectral_embedding))
cat("Using first 3 dimensions of spectral embedding for 3D visualization\n")
## Using first 3 dimensions of spectral embedding for 3D visualization
cat("These correspond to the smallest non-zero eigenvalues of the graph Laplacian\n")
## These correspond to the smallest non-zero eigenvalues of the graph Laplacian
# Merge spectral embedding with cluster assignments
plot_data <- spectral_data_3d %>%
left_join(clustering_results, by = "name") %>%
mutate(cluster = factor(cluster))
cat(sprintf("Created plot data with %d Pokemon\n", nrow(plot_data)))
## Created plot data with 948 Pokemon
cat(sprintf("Number of clusters: %d\n", n_distinct(plot_data$cluster)))
## Number of clusters: 18
cluster_summary <- plot_data %>%
count(cluster) %>%
arrange(desc(n))
knitr::kable(cluster_summary,
col.names = c("Cluster", "Number of Pokemon"),
caption = "Cluster Sizes")
| Cluster | Number of Pokemon |
|---|---|
| 18 | 135 |
| 5 | 109 |
| 17 | 82 |
| 9 | 73 |
| 13 | 59 |
| 1 | 57 |
| 16 | 51 |
| 14 | 50 |
| 2 | 49 |
| 12 | 49 |
| 15 | 48 |
| 6 | 47 |
| 7 | 41 |
| 10 | 33 |
| 11 | 33 |
| 3 | 30 |
| 4 | 1 |
| 8 | 1 |
# Define high-contrast color palette for 18 clusters
high_contrast_colors <- c(
"#e6194b", "#3cb44b", "#ffe119", "#4363d8", "#f58231",
"#911eb4", "#46f0f0", "#f032e6", "#bcf60c", "#fabebe",
"#008080", "#e6beff", "#9a6324", "#fffac8", "#800000",
"#aaffc3", "#808000", "#ffd8b1"
)
# Create interactive 3D plotly plot
p <- plot_ly(
data = plot_data,
x = ~Dim1,
y = ~Dim2,
z = ~Dim3,
color = ~cluster,
colors = high_contrast_colors,
text = ~name,
type = "scatter3d",
mode = "markers",
marker = list(
size = 4,
sizemode = 'diameter',
opacity = 0.85,
line = list(
color = "rgba(0, 0, 0, 0.8)",
width = 1
)
),
hovertemplate = paste(
"<b>%{text}</b><br>",
"Cluster: %{marker.color}<br>",
"Dim 1: %{x:.3f}<br>",
"Dim 2: %{y:.3f}<br>",
"Dim 3: %{z:.3f}<br>",
"<extra></extra>"
)
) %>%
layout(
title = list(
text = "Pokemon Spectral Clustering - 3D Visualization in Spectral Embedding Space (k=18)<br><sub>First 3 eigenvectors from normalized graph Laplacian</sub>",
font = list(size = 16)
),
scene = list(
xaxis = list(
title = "Spectral Dim 1 (2nd smallest eigenvalue)",
gridcolor = "#E5E5E5",
showbackground = TRUE,
backgroundcolor = "#F8F8F8"
),
yaxis = list(
title = "Spectral Dim 2 (3rd smallest eigenvalue)",
gridcolor = "#E5E5E5",
showbackground = TRUE,
backgroundcolor = "#F8F8F8"
),
zaxis = list(
title = "Spectral Dim 3 (4th smallest eigenvalue)",
gridcolor = "#E5E5E5",
showbackground = TRUE,
backgroundcolor = "#F8F8F8"
),
camera = list(
eye = list(x = 1.5, y = 1.5, z = 1.3)
)
),
paper_bgcolor = "white",
legend = list(
title = list(text = "Cluster"),
orientation = "v",
x = 1.02,
y = 1
)
)
p
# Sample Pokemon from each cluster
set.seed(42)
cluster_samples <- plot_data %>%
group_by(cluster) %>%
slice_sample(n = 5) %>%
ungroup() %>%
select(cluster, name, Dim1, Dim2, Dim3) %>%
arrange(cluster, name)
knitr::kable(cluster_samples,
col.names = c("Cluster", "Pokemon", "Dim 1", "Dim 2", "Dim 3"),
caption = "Sample Pokemon from Each Cluster (up to 5 per cluster)",
digits = 3)
| Cluster | Pokemon | Dim 1 | Dim 2 | Dim 3 |
|---|---|---|---|---|
| 1 | Aerodactyl | -0.008 | -0.003 | -0.016 |
| 1 | Crobat | -0.026 | -0.002 | 0.016 |
| 1 | Natu | -0.032 | 0.019 | -0.012 |
| 1 | Skarmory | -0.045 | 0.033 | 0.014 |
| 1 | Tranquill | 0.005 | 0.003 | -0.016 |
| 2 | Cofagrigus | 0.056 | -0.048 | -0.006 |
| 2 | Duskull | 0.081 | -0.046 | 0.007 |
| 2 | Golurk | 0.019 | 0.011 | 0.025 |
| 2 | Phantump | 0.077 | -0.036 | -0.039 |
| 2 | Spiritomb | 0.120 | -0.017 | -0.015 |
| 3 | Dracovish | 0.008 | -0.002 | -0.004 |
| 3 | Dracozolt | 0.004 | 0.013 | 0.003 |
| 3 | Palkia | 0.038 | 0.016 | -0.017 |
| 3 | Reshiram | 0.046 | 0.051 | -0.064 |
| 3 | Xerneas | 0.007 | 0.019 | -0.012 |
| 4 | Eternatus | -0.027 | -0.128 | 0.084 |
| 5 | Arrokuda | -0.004 | 0.012 | 0.017 |
| 5 | Feebas | -0.006 | -0.010 | 0.023 |
| 5 | Golduck | 0.004 | -0.006 | 0.013 |
| 5 | Inteleon | -0.003 | -0.005 | 0.001 |
| 5 | Slowbro | -0.066 | 0.025 | -0.004 |
| 6 | Carbink | -0.001 | -0.004 | 0.005 |
| 6 | Grimmsnarl | -0.016 | 0.088 | -0.055 |
| 6 | Primarina | 0.007 | 0.045 | -0.015 |
| 6 | Spritzee | -0.011 | 0.067 | -0.023 |
| 6 | Togetic | -0.009 | 0.046 | -0.013 |
| 7 | Carvanha | 0.037 | 0.025 | -0.014 |
| 7 | Poochyena | 0.028 | -0.001 | -0.009 |
| 7 | Scrafty | 0.028 | -0.024 | 0.046 |
| 7 | Thievul | 0.021 | -0.008 | 0.011 |
| 7 | Ting-Lu | 0.053 | 0.039 | 0.013 |
| 8 | Ditto | -0.028 | 0.029 | 0.004 |
| 9 | Cascoon | 0.010 | 0.016 | -0.003 |
| 9 | Dottler | -0.027 | 0.006 | 0.042 |
| 9 | Illumise | -0.010 | -0.007 | 0.042 |
| 9 | Rabsca | -0.021 | 0.008 | 0.022 |
| 9 | Surskit | 0.022 | 0.005 | 0.014 |
| 10 | Articuno | -0.011 | -0.078 | -0.068 |
| 10 | Cryogonal | -0.048 | -0.140 | -0.017 |
| 10 | Glaceon | -0.027 | -0.065 | -0.044 |
| 10 | Kyurem | -0.022 | -0.073 | -0.097 |
| 10 | Snover | -0.040 | -0.068 | -0.073 |
| 11 | Appletun | -0.026 | 0.035 | -0.033 |
| 11 | Axew | -0.052 | 0.014 | -0.012 |
| 11 | Bagon | -0.032 | 0.018 | -0.014 |
| 11 | Gible | -0.047 | 0.067 | 0.032 |
| 11 | Noibat | -0.035 | 0.036 | -0.013 |
| 12 | Conkeldurr | -0.033 | 0.005 | -0.014 |
| 12 | Machoke | -0.029 | 0.002 | -0.013 |
| 12 | Machop | -0.028 | 0.004 | -0.020 |
| 12 | Mankey | -0.005 | -0.018 | 0.009 |
| 12 | Passimian | -0.016 | -0.016 | 0.025 |
| 13 | Armarouge | -0.060 | 0.045 | -0.023 |
| 13 | Blaziken | -0.009 | 0.027 | -0.042 |
| 13 | Carkol | -0.001 | 0.011 | -0.040 |
| 13 | Slugma | 0.034 | 0.030 | -0.053 |
| 13 | Torchic | 0.016 | 0.031 | -0.054 |
| 14 | Armaldo | 0.021 | -0.008 | -0.056 |
| 14 | Garganacl | -0.003 | -0.011 | -0.008 |
| 14 | Mamoswine | -0.047 | -0.042 | 0.013 |
| 14 | Rhyperior | -0.006 | 0.041 | 0.024 |
| 14 | Roggenrola | -0.016 | -0.009 | -0.052 |
| 15 | Beldum | -0.083 | 0.000 | 0.000 |
| 15 | Melmetal | 0.008 | 0.047 | -0.048 |
| 15 | Metang | -0.088 | -0.002 | 0.011 |
| 15 | Perrserker | -0.147 | 0.009 | 0.045 |
| 15 | Revavroom | -0.069 | -0.022 | 0.016 |
| 16 | Chimecho | -0.019 | -0.012 | 0.027 |
| 16 | Cosmoem | 0.032 | 0.009 | 0.035 |
| 16 | Espurr | -0.044 | -0.010 | 0.030 |
| 16 | Jirachi | -0.045 | 0.010 | 0.019 |
| 16 | Wynaut | -0.026 | 0.011 | 0.003 |
| 17 | Arboliva | 0.029 | -0.023 | -0.011 |
| 17 | Chikorita | -0.003 | 0.003 | -0.026 |
| 17 | Fomantis | 0.013 | 0.009 | -0.037 |
| 17 | Jumpluff | -0.001 | 0.014 | 0.016 |
| 17 | Tangela | 0.003 | 0.017 | -0.020 |
| 18 | Furfrou | 0.028 | -0.070 | 0.051 |
| 18 | Luxray | -0.003 | 0.014 | 0.031 |
| 18 | Skuntank | 0.019 | -0.006 | 0.013 |
| 18 | Spinda | 0.005 | -0.040 | 0.014 |
| 18 | Stunfisk | 0.025 | 0.039 | 0.055 |
summary_stats <- plot_data %>%
group_by(cluster) %>%
summarise(
n = n(),
mean_Dim1 = mean(Dim1),
mean_Dim2 = mean(Dim2),
mean_Dim3 = mean(Dim3),
sd_Dim1 = sd(Dim1),
sd_Dim2 = sd(Dim2),
sd_Dim3 = sd(Dim3),
.groups = "drop"
) %>%
arrange(cluster)
knitr::kable(summary_stats,
col.names = c("Cluster", "Size", "Mean D1", "Mean D2", "Mean D3", "SD D1", "SD D2", "SD D3"),
caption = "Cluster Statistics in 3D Spectral Embedding Space",
digits = 3)
| Cluster | Size | Mean D1 | Mean D2 | Mean D3 | SD D1 | SD D2 | SD D3 |
|---|---|---|---|---|---|---|---|
| 1 | 57 | -0.008 | 0.012 | -0.011 | 0.020 | 0.017 | 0.020 |
| 2 | 49 | 0.049 | -0.023 | -0.011 | 0.025 | 0.023 | 0.028 |
| 3 | 30 | 0.043 | 0.031 | -0.020 | 0.045 | 0.041 | 0.050 |
| 4 | 1 | -0.027 | -0.128 | 0.084 | NA | NA | NA |
| 5 | 109 | 0.002 | 0.002 | 0.004 | 0.017 | 0.020 | 0.025 |
| 6 | 47 | -0.007 | 0.042 | -0.003 | 0.023 | 0.022 | 0.025 |
| 7 | 41 | 0.033 | -0.004 | 0.004 | 0.022 | 0.030 | 0.027 |
| 8 | 1 | -0.028 | 0.029 | 0.004 | NA | NA | NA |
| 9 | 73 | 0.009 | 0.006 | 0.016 | 0.027 | 0.025 | 0.037 |
| 10 | 33 | -0.049 | -0.084 | -0.056 | 0.030 | 0.019 | 0.025 |
| 11 | 33 | -0.031 | 0.020 | 0.000 | 0.020 | 0.019 | 0.030 |
| 12 | 49 | -0.017 | -0.010 | 0.000 | 0.032 | 0.021 | 0.023 |
| 13 | 59 | 0.005 | 0.024 | -0.029 | 0.022 | 0.017 | 0.018 |
| 14 | 50 | -0.002 | 0.018 | 0.017 | 0.018 | 0.034 | 0.044 |
| 15 | 48 | -0.037 | 0.005 | 0.002 | 0.032 | 0.021 | 0.027 |
| 16 | 51 | -0.033 | 0.001 | 0.017 | 0.022 | 0.015 | 0.025 |
| 17 | 82 | 0.003 | 0.000 | -0.011 | 0.015 | 0.013 | 0.019 |
| 18 | 135 | 0.011 | -0.021 | 0.020 | 0.017 | 0.024 | 0.020 |
The interactive 3D plot shows Pokemon in the spectral embedding space - the actual space where k-means clustering was performed. The three dimensions correspond to: - Dim 1: 2nd smallest eigenvalue of graph Laplacian (1st is trivial ~0) - Dim 2: 3rd smallest eigenvalue of graph Laplacian - Dim 3: 4th smallest eigenvalue of graph Laplacian
Interactive features: - Rotate: Click and drag to rotate the view - Zoom: Scroll wheel or pinch to zoom in/out - Pan: Right-click and drag (or shift+drag) - Hover: Mouse over points to see Pokemon details - Filter: Click legend items to show/hide clusters - Reset: Double-click to reset view
Note: Marker sizes in 3D plotly plots are fixed in screen pixels and don’t scale with zoom. For best viewing: - Zoom in to see detail in dense cluster regions - Hide clusters via legend to reduce visual clutter - Rotate to find angles that separate overlapping clusters
This 3D spectral visualization is superior to PCA because: - Spectral embedding preserves graph structure and cluster separation - K-means was performed in this 20D space, so 3D projection is more representative - Captures manifold structure better than linear PCA - Additional dimension provides better cluster separation visibility
Report generated on 2025-11-06 08:57:22.234287